####################################
# Title - Event-Based Framing of Democracy in American News Media
# Date - May 28th, 2024
# Goal - Prevalence of each model of democracy
####################################

rm(list=ls())

  # Library
  library(quanteda)
  library(dplyr)
  library(text2vec)
  library(conText)
  library(ggplot2)
  library(lubridate)
  library(tidyr)
  library(PerformanceAnalytics) #correlation plot
  
  # Dataset
  data<-readRDS("data/nyt_cleaned_articles.rds")
  glove_vectors <- readRDS("data/glove.rds") # Embedding model is already trained by stanford researchers
  transform_vectors<-readRDS("data/khodakA.rds")

#########################
#
# Preprocessing
#
#########################

  data$pub_month_factor<-as.factor(data$pub_month)
  data$pub_year_factor<-as.factor(data$pub_year)
  data$pub_year<-as.numeric(data$pub_year)
  data$global<-1
  
  text_corpus <- corpus(data,
                        docid_field = "docid",
                        text_field = "clean_text",
                        meta = T)
  
  # tokenize corpus removing unnecessary (i.e. semantically uninformative) elements
  toks <- tokens(text_corpus, remove_punct=T, remove_symbols=T, remove_numbers=F, remove_url = T, remove_separators=T)
  
  # clean out stopwords and words with 2 or fewer characters
  toks_nostop <- tokens_select(toks, pattern = stopwords("en"), selection = "remove", min_nchar=3)
  
  # only use features that appear at least 5 times in the corpus
  feats <- dfm(toks_nostop, tolower=T, verbose = FALSE) %>% dfm_trim(min_termfreq = 5) %>% featnames()
  
  # check spelling. toupper avoids names being considered misspelled
  if (requireNamespace("hunspell", quietly = TRUE)) {
    library(hunspell) # spell check library
    spellcheck <-  hunspell_check(toupper(feats), dict = hunspell::dictionary("en_US"))
    feats <- feats[spellcheck]
  }
  
  # leave the pads so that non-adjacent words will not become adjacent
  toks_nostop_feats <- tokens_select(toks_nostop, feats, padding = TRUE)
  
  
  # build a tokenized corpus of contexts surrounding the target term "democracy"
  demo_toks <- tokens_context(x = toks_nostop_feats, pattern = c("democracy", "Democracy", "DEMOCRACY",
                                                                 "democracies", "Democracies", "DEMOCRACIES",
                                                                 "democracy's", "Democracy's", "DEMOCRACY's",
                                                                 "democratic"), window = 6L,
                              valuetype = "fixed", case_insensitive = FALSE, hard_cut = FALSE, rm_keyword = FALSE,
                              verbose = TRUE) #No regex which include "undemocracy
  
  rm(text_corpus, toks, toks_nostop, feats)
  
  # build document-feature matrix
  demo_dfm <- dfm(demo_toks)
  
  # build a document-embedding-matrix
  demo_dem <- dem(x = demo_dfm, 
                  pre_trained = glove_vectors, # pre-trained glovel model with 300-dim 
                  transform = TRUE, 
                  transform_matrix = transform_vectors, 
                  verbose = TRUE)
  
  # to get a single "corpus-wide" embedding, take the column average
  demo_wv <- matrix(colMeans(demo_dem), ncol = ncol(demo_dem)) %>%  `rownames<-`("democracy")
  dim(demo_wv)
  
##############################
#
# 2. Cosine similarity for each dictionary
#
##############################
  
  #define dictionary
  elec_dict <- c("multiparty", "one-vote", "enfranchisement", "suffrage", "voter", "voters", "election", "voting", "vote", "votes", "ballot")
  lib_dict <-c("pluralism", "freedom", "freedoms", "liberty", "liberties", "rights", "individuality", 
               "constitutional", "constitutionalism", "constitutions")
  parti_dict<-c("activism", "grassroots", "grass-roots", "movement", "demonstrations", "participation", 
                "rallies", "protest", "protests", "plebiscite")
  delib_dict<-c("deliberative", "consensus-building", "consensus", "dialogue", "thoughtful", "deliberation")
  egal_dict <-c("egalitarian","egalitarianism", "equality", "unequal","inequality","inequalities", "entitlement","welfare",
                "disparities", "disparity", "equal")
  autho_dict<-c("meritocratic","elites","elitist","centralized", "top-down", "charisma")
  
#############################
#
# Calculate cosine similarity
#
#############################
    
#Electoral democracy
  #   set.seed(2021L)
  #   elec_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                    groups = docvars(demo_toks, 'pub_month_factor'),
  #                                    features = elec_dict,
  #                                    pre_trained = glove_vectors,
  #                                    transform = TRUE,
  #                                    transform_matrix = transform_vectors,
  #                                    bootstrap = TRUE,
  #                                    num_bootstraps = 100,
  #                                    confidence_level = 0.95,
  #                                    stem = FALSE,  # We are going to stem. So, don't put star at keywords
  #                                    as_list = TRUE)
  #   set.seed(2021L)
  #   elec_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                    groups = docvars(demo_toks, 'pub_year_factor'),
  #                                    features = elec_dict,
  #                                    pre_trained = glove_vectors,
  #                                    transform = TRUE,
  #                                    transform_matrix = transform_vectors,
  #                                    bootstrap = TRUE,
  #                                    num_bootstraps = 100,
  #                                    confidence_level = 0.95,
  #                                    stem = FALSE,  # We are going to stem. So, don't put star at keywords
  #                                    as_list = TRUE)
  # 
  # #Liberal democracy
  #   set.seed(2021L)
  #   lib_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                    groups = docvars(demo_toks, 'pub_month_factor'),
  #                                    features = lib_dict,
  #                                    pre_trained = glove_vectors,
  #                                    transform = TRUE,
  #                                    transform_matrix = transform_vectors,
  #                                    bootstrap = TRUE,
  #                                    num_bootstraps = 100,
  #                                    confidence_level = 0.95,
  #                                    stem = FALSE,
  #                                    as_list = TRUE)
  #   set.seed(2021L)
  #   lib_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                   groups = docvars(demo_toks, 'pub_year_factor'),
  #                                   features = lib_dict,
  #                                   pre_trained = glove_vectors,
  #                                   transform = TRUE,
  #                                   transform_matrix = transform_vectors,
  #                                   bootstrap = TRUE,
  #                                   num_bootstraps = 100,
  #                                   confidence_level = 0.95,
  #                                   stem = FALSE,
  #                                   as_list = TRUE)
  # 
  # #Participatory democracy
  #   set.seed(2021L)
  #   parti_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                   groups = docvars(demo_toks, 'pub_month_factor'),
  #                                   features = parti_dict,
  #                                   pre_trained = glove_vectors,
  #                                   transform = TRUE,
  #                                   transform_matrix = transform_vectors,
  #                                   bootstrap = TRUE,
  #                                   num_bootstraps = 100,
  #                                   confidence_level = 0.95,
  #                                   stem = FALSE,
  #                                   as_list = TRUE)
  #   set.seed(2021L)
  #   parti_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                     groups = docvars(demo_toks, 'pub_year_factor'),
  #                                     features = parti_dict,
  #                                     pre_trained = glove_vectors,
  #                                     transform = TRUE,
  #                                     transform_matrix = transform_vectors,
  #                                     bootstrap = TRUE,
  #                                     num_bootstraps = 100,
  #                                     confidence_level = 0.95,
  #                                     stem = FALSE,
  #                                     as_list = TRUE)
  # 
  # #Deliberative democracy
  #   set.seed(2021L)
  #   delib_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                   groups = docvars(demo_toks, 'pub_month_factor'),
  #                                   features = delib_dict,
  #                                   pre_trained = glove_vectors,
  #                                   transform = TRUE,
  #                                   transform_matrix = transform_vectors,
  #                                   bootstrap = TRUE,
  #                                   num_bootstraps = 100,
  #                                   confidence_level = 0.95,
  #                                   stem = FALSE,
  #                                   as_list = TRUE)
  #   set.seed(2021L)
  #   delib_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                     groups = docvars(demo_toks, 'pub_year_factor'),
  #                                     features = delib_dict,
  #                                     pre_trained = glove_vectors,
  #                                     transform = TRUE,
  #                                     transform_matrix = transform_vectors,
  #                                     bootstrap = TRUE,
  #                                     num_bootstraps = 100,
  #                                     confidence_level = 0.95,
  #                                     stem = FALSE,
  #                                     as_list = TRUE)
  # 
  # #Egalitarian democracy
  #   set.seed(2021L)
  #   egal_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                     groups = docvars(demo_toks, 'pub_month_factor'),
  #                                     features = egal_dict,
  #                                     pre_trained = glove_vectors,
  #                                     transform = TRUE,
  #                                     transform_matrix = transform_vectors,
  #                                     bootstrap = TRUE,
  #                                     num_bootstraps = 100,
  #                                     confidence_level = 0.95,
  #                                     stem = FALSE,
  #                                     as_list = TRUE)
  #   set.seed(2021L)
  #   egal_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                    groups = docvars(demo_toks, 'pub_year_factor'),
  #                                    features = egal_dict,
  #                                    pre_trained = glove_vectors,
  #                                    transform = TRUE,
  #                                    transform_matrix = transform_vectors,
  #                                    bootstrap = TRUE,
  #                                    num_bootstraps = 100,
  #                                    confidence_level = 0.95,
  #                                    stem = FALSE,
  #                                    as_list = TRUE)
  # 
  # #Authoritarian democracy
  #   set.seed(2021L)
  #   autho_demo_cos_month<-get_cos_sim(x = demo_toks,
  #                                    groups = docvars(demo_toks, 'pub_month_factor'),
  #                                    features = autho_dict,
  #                                    pre_trained = glove_vectors,
  #                                    transform = TRUE,
  #                                    transform_matrix = transform_vectors,
  #                                    bootstrap = TRUE,
  #                                    num_bootstraps = 100,
  #                                    confidence_level = 0.95,
  #                                    stem = FALSE,
  #                                    as_list = TRUE)
  #   set.seed(2021L)
  #   autho_demo_cos_year<-get_cos_sim(x = demo_toks,
  #                                     groups = docvars(demo_toks, 'pub_year_factor'),
  #                                     features = autho_dict,
  #                                     pre_trained = glove_vectors,
  #                                     transform = TRUE,
  #                                     transform_matrix = transform_vectors,
  #                                     bootstrap = TRUE,
  #                                     num_bootstraps = 100,
  #                                     confidence_level = 0.95,
  #                                     stem = FALSE,
  #                                     as_list = TRUE)

  # word_demo_cosine_month<-list(elec_demo_cos_month, lib_demo_cos_month, parti_demo_cos_month,
  #                        delib_demo_cos_month, egal_demo_cos_month,autho_demo_cos_month)
  # word_demo_cosine_year<-list(elec_demo_cos_year, lib_demo_cos_year, parti_demo_cos_year,
  #                              delib_demo_cos_year, egal_demo_cos_year,autho_demo_cos_year)
  # 
  # saveRDS(word_demo_cosine_month, "data/word_demo2_cosine_month.rds")
  # saveRDS(word_demo_cosine_year,  "data/word_demo2_cosine_year.rds")

##############################
#
# 3. Visualization - Month
#
##############################
    # word_demo_cosine_month <-readRDS("data/word_demo2_cosine_month.rds")
    # 
    # elec_demo_cos_month <-word_demo_cosine_month[[1]]
    # lib_demo_cos_month  <-word_demo_cosine_month[[2]]
    # parti_demo_cos_month<-word_demo_cosine_month[[3]]
    # delib_demo_cos_month<-word_demo_cosine_month[[4]]
    # egal_demo_cos_month <-word_demo_cosine_month[[5]]
    # autho_demo_cos_month<-word_demo_cosine_month[[6]]
    # 
    # a<-unique(data$pub_month_factor)
    # 
    # result_df_cos <- data.frame(target = NA,
    #                              feature = NA,
    #                              value = NA,
    #                              std.error = NA,
    #                              lower.ci = NA,
    #                              upper.ci = NA,
    #                              demo = NA)
    # for (month_factor in a) {
    #   b <- elec_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Electoral Democracy")
    #   c <- lib_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Liberal Democracy")
    #   d <- parti_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Participatory Democracy")
    #   e <- delib_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Deliberative Democracy")
    #   f <- egal_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Egalitarian Democracy")
    #   g <- autho_demo_cos_month[[month_factor]] %>% as.data.frame %>% mutate(demo = "Authoritarian Democracy")
    #   result_df_cos <- rbind(result_df_cos, b,c,d,e,f,g)
    # }
    # result_df_cos<-result_df_cos[-1,]
    # result_df_cos$target<-ymd(result_df_cos$target)
    # 
    # saveRDS(result_df_cos, "data/keyword_demo2_cosine_month.rds")
    
    result_df_cos<-readRDS("data/keyword_demo2_cosine_month.rds")
    
  #Visualization
    library(ggplot2)
    library(ggpubr) #ggarange
    
    y_high<-max(result_df_cos$value)
    y_low <-min(result_df_cos$value)

    fig_elec<-result_df_cos %>% filter(demo == "Electoral Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      geom_hline(yintercept = 0.0)+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_lib<-result_df_cos %>% filter(demo == "Liberal Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_parti<-result_df_cos %>% filter(demo == "Participatory Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_delib<-result_df_cos %>% filter(demo == "Deliberative Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_egal<-result_df_cos %>% filter(demo == "Egalitarian Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_autho<-result_df_cos %>% filter(demo == "Authoritarian Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
      
    ggarrange(fig_elec, fig_lib, fig_parti, fig_delib, fig_egal, fig_autho,
              label.y = "cosine similarity")
    
  # Calculate the mean
    # elect_result<-result_df_cos %>% filter(demo == "Electoral Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Electoral Democracy")
    # liberal_result<-result_df_cos %>% filter(demo == "Liberal Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Liberal Democracy")
    # partici_result<-result_df_cos %>% filter(demo == "Participatory Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Participatory Democracy")
    # delib_result<-result_df_cos %>% filter(demo == "Deliberative Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Deliberative Democracy")
    # egal_result<-result_df_cos %>% filter(demo == "Egalitarian Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Egalitarian Democracy")
    # autho_result<-result_df_cos %>% filter(demo == "Authoritarian Democracy") %>% group_by(target) %>% 
    #   summarise(prevalence = mean(value),
    #             demo = "Authoritarian Democracy")
    # 
    # result_dem_mean_month<-rbind(elect_result, liberal_result, partici_result, delib_result, egal_result, autho_result)
    # result_dem_mean_month$target<-ymd(result_dem_mean_month$target)
    # result_dem_mean_month<-result_dem_mean_month %>% mutate(model_num = case_when(
    #   demo == "Electoral Democracy" ~ 1,
    #   demo == "Liberal Democracy" ~ 2,
    #   demo == "Participatory Democracy" ~ 3,
    #   demo == "Deliberative Democracy" ~ 4,
    #   demo == "Egalitarian Democracy" ~ 5,
    #   demo == "Authoritarian Democracy" ~ 6
    # ))
    # 
    # saveRDS(result_dem_mean_month, "data/dictionary_demo2_cosine_mean_month.rds")
    
    result_dem_mean_month<-readRDS("data/dictionary_demo2_cosine_mean_month.rds")
    y_high<-max(result_dem_mean_month$prevalence)
    y_low <-min(result_dem_mean_month$prevalence)
    
    result_dem_mean_month %>% 
      ggplot() +
      geom_point(aes(x = target, y = prevalence, color = demo))+
      geom_smooth(aes(x = target, y = prevalence), method = "auto") + #"auto" will give more smooth one
      xlab("") + 
      ylab("") +
      #ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1),
            legend.position="none")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    ggsave(filename = paste0("fig/all_dictionary2_similarity_trend_month.jpeg"), width = 12, height = 8, dpi = 1000)

    #summary statistics
    result_dem_mean_month %>% group_by(demo) %>% summarise(mean_prevalence = mean(prevalence),
                                                           std = sd(prevalence))
    
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Electoral Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Liberal Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Participatory Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Deliberative Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Egalitarian Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_month %>% filter(demo=="Authoritarian Democracy")))
    
  # See the correlation
    result_dem_mean_month_wide<-pivot_wider(result_dem_mean_month %>% select(!model_num), 
                                            names_from = demo, values_from = prevalence)
    
    chart.Correlation(result_dem_mean_month_wide %>% select(!target), histogram = TRUE, method = "pearson")
    
    #Each pari
    cor.test(result_dem_mean_month_wide$`Electoral Democracy`, result_dem_mean_month_wide$`Authoritarian Democracy`)
    
    #regression
    summary(lm(`Electoral Democracy` ~ ., data = result_dem_mean_month_wide))
    summary(lm(`Liberal Democracy` ~ ., data = result_dem_mean_month_wide))
    summary(lm(`Participatory Democracy` ~ ., data = result_dem_mean_month_wide))
    summary(lm(`Deliberative Democracy` ~ ., data = result_dem_mean_month_wide))
    summary(lm(`Egalitarian Democracy` ~ ., data = result_dem_mean_month_wide))
    summary(lm(`Authoritarian Democracy` ~ ., data = result_dem_mean_month_wide))
    
##############################
#
# 4. Visualization - Year
#
##############################

    word_demo_cosine_year <-readRDS("data/word_demo2_cosine_year.rds")
    
    elec_demo_cos_year <-word_demo_cosine_year[[1]]
    lib_demo_cos_year  <-word_demo_cosine_year[[2]]
    parti_demo_cos_year<-word_demo_cosine_year[[3]]
    delib_demo_cos_year<-word_demo_cosine_year[[4]]
    egal_demo_cos_year <-word_demo_cosine_year[[5]]
    autho_demo_cos_year<-word_demo_cosine_year[[6]]
    
    a<-unique(data$pub_year_factor)
    
    result_df_cos <- data.frame(target = NA,
                                feature = NA,
                                value = NA,
                                std.error = NA,
                                lower.ci = NA,
                                upper.ci = NA,
                                demo = NA)
    for (month_factor in a) {
      b <- elec_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Electoral Democracy")  # Assuming demo_month_nns is a list
      c <- lib_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Liberal Democracy")
      d <- parti_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Participatory Democracy")
      e <- delib_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Deliberative Democracy")
      f <- egal_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Egalitarian Democracy")
      g <- autho_demo_cos_year[[month_factor]] %>% as.data.frame %>% mutate(demo = "Authoritarian Democracy")
      result_df_cos <- rbind(result_df_cos, b,c,d,e,f,g)
    }
    result_df_cos<-result_df_cos[-1,]
    
    #Visualization
    library(ggplot2)
    library(ggpubr) #ggarange
    
    y_high<-max(result_df_cos$value)
    y_low <-min(result_df_cos$value)
    
    fig_elec<-result_df_cos %>% filter(demo == "Electoral Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_lib<-result_df_cos %>% filter(demo == "Liberal Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_parti<-result_df_cos %>% filter(demo == "Participatory Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_delib<-result_df_cos %>% filter(demo == "Deliberative Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_egal<-result_df_cos %>% filter(demo == "Egalitarian Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    fig_autho<-result_df_cos %>% filter(demo == "Authoritarian Democracy") %>% 
      ggplot() +
      geom_line(aes(x = target, y = value, color = feature, group = feature))+
      xlab("") + 
      ylab("") +
      ylim(y_low, y_high)+
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    
    ggarrange(fig_elec, fig_lib, fig_parti, fig_delib, fig_egal, fig_autho,
              label.y = "cosine similarity")
    
    # Calculate the mean
    #electoral model
    elect_result<-result_df_cos %>% filter(demo == "Electoral Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                   demo = "Electoral Democracy")
    liberal_result<-result_df_cos %>% filter(demo == "Liberal Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                   demo = "Liberal Democracy")
    partici_result<-result_df_cos %>% filter(demo == "Participatory Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                         demo = "Participatory Democracy")
    delib_result<-result_df_cos %>% filter(demo == "Deliberative Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                      demo = "Deliberative Democracy")
    egal_result<-result_df_cos %>% filter(demo == "Egalitarian Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                    demo = "Egalitarian Democracy")
    autho_result<-result_df_cos %>% filter(demo == "Authoritarian Democracy") %>% group_by(target) %>% summarise(prevalence = mean(value),
                                                                                                       demo = "Authoritarian Democracy")
    
    result_dem_mean_year<-rbind(elect_result, liberal_result, partici_result, delib_result, egal_result, autho_result)
    result_dem_mean_year$target<-as.numeric(result_dem_mean_year$target)
    
    result_dem_mean_year<-result_dem_mean_year %>% mutate(model_num = case_when(
      demo == "Electoral Democracy" ~ 1,
      demo == "Liberal Democracy" ~ 2,
      demo == "Participatory Democracy" ~ 3,
      demo == "Deliberative Democracy" ~ 4,
      demo == "Egalitarian Democracy" ~ 5,
      demo == "Authoritarian Democracy" ~ 6
    ))
  
    result_dem_mean_year %>% 
      ggplot() +
      geom_point(aes(x = target, y = prevalence, color = demo, group = demo))+
      #geom_smooth(aes(x = target, y = prevalence), method = "lm") + #"auto" will give more smooth one
      geom_line(aes(x = target, y = prevalence, color = demo, group = demo)) +
      xlab("") + 
      ylab("") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, hjust = 1),
            legend.position="none")+
      facet_wrap(~reorder(demo, model_num), scales = "free_y", ncol = 2)
    
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Electoral Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Liberal Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Participatory Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Deliberative Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Egalitarian Democracy")))
    summary(lm(prevalence ~ target, data = result_dem_mean_year %>% filter(demo=="Authoritarian Democracy")))
    
    ggsave(filename = paste0("fig/all_dictionary2_similarity_trend_year.jpeg"), width = 12, height = 8, dpi = 1000)
    